/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2003 - 2004 by Myricom, Inc.  All rights reserved.          *
 *************************************************************************/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "mx_auto_config.h"
#include "myriexpress.h"
#if !MX_OS_WINNT
#include <unistd.h>
#include <sys/time.h>
#else
#include "mx_uni.h"
#endif
#include "mx_byteswap.h"
#include "test_common.h"

#define FILTER     0x12345
#define MATCH_VAL  0xabcdef
#define DFLT_EID   1
#define DFLT_INC   1
#define DFLT_START 0
#define DFLT_END   128
#define MAX_LEN    (1024*1024*1024) 
#define DFLT_ITER  1000
#define DFLT_MULT  1.0

void
usage()
{
  fprintf(stderr, "Usage: mx_pingpong_unex [args]\n");
  fprintf(stderr, "-n nic_id - local NIC ID [MX_ANY_NIC]\n");
  fprintf(stderr, "-b board_id - local Board ID [MX_ANY_NIC]\n");
  fprintf(stderr, "-e local_eid - local endpoint ID [%d]\n", DFLT_EID);
  fprintf(stderr, "-d hostname - destination hostname, required for sender\n");
  fprintf(stderr, "-r remote_eid - remote endpoint ID [%d]\n", DFLT_EID);
  fprintf(stderr, "-f filter - remote filter [%x]\n", FILTER);
  fprintf(stderr, "-S start_len - starting length [%d]\n", DFLT_START);
  fprintf(stderr, "-E end_len - ending length [%d]\n", DFLT_END);
  fprintf(stderr, "-I incr - increment packet length [%d]\n", DFLT_INC);
  fprintf(stderr, "-M mult - length multiplier, overrides -I\n");
  fprintf(stderr, "-N iterations - iterations per length [%d]\n", DFLT_ITER);
  fprintf(stderr, "-V - verify msg content [OFF]\n");
  fprintf(stderr, "-w - block rather than poll\n");  
  fprintf(stderr, "-s - use synchronous send instead of regular\n");
  fprintf(stderr, "-m nsegs - split the buffer into multiple segments [1]\n");
  fprintf(stderr, "-h - help\n");
  fprintf(stderr, "\tIf -M specified, length progression is geometric, "
	  "else arithmetic\n"); 

}

int Verify;		/* If true, verify packet contents */

static inline
mx_return_t
pingpong(uint32_t sender,
	 mx_endpoint_t ep,
	 mx_endpoint_addr_t dest,
	 int iter,
	 uint32_t start_len,
	 uint32_t end_len,
	 int inc,
	 double mult,
	 int wait,
	 int sync,
	 uint32_t nsegs)
{
  mx_status_t stat;
  mx_request_t sreq, rreq;
  mx_segment_t * segs;
  unsigned char *buff;
  struct timeval start_time, end_time;
  unsigned int i, cur_len, usec, tmp_len, seg_len;
  double lat, bw;
  uint64_t sseq, rseq;
  uint32_t result;
  
  sseq = 0x1000;
  rseq = 0;

  /* allocate buffers for our data */
  buff = (unsigned char *) malloc(end_len);
  if (buff == NULL) {
    perror("malloc full");
    return MX_NO_RESOURCES;
  }

  /* allocate segments */
  segs = (mx_segment_t *) malloc(nsegs * sizeof(mx_segment_t));
  if (segs == NULL) {
    perror("malloc segs");
    return MX_NO_RESOURCES;
  }

  if (sender) {
    /* print header */
    printf("Running %d iterations.\n", iter);
    printf("   Length   Latency(us)    Bandwidth(MB/s)\n");
  }

  /* loop through each sample length */
  for (cur_len = start_len; cur_len < end_len; ) {

    /* set up segment list */
    tmp_len = cur_len;
    seg_len = cur_len / nsegs;
    for(i=0; i<nsegs-1; i++) {
      segs[i].segment_ptr = buff + seg_len * i;
      segs[i].segment_length = seg_len;
    }
    segs[nsegs-1].segment_ptr = buff + cur_len - tmp_len;
    segs[nsegs-1].segment_length = tmp_len;

    /* get starting time */
    gettimeofday(&start_time, NULL);

    /* loop through the iterations */
    for (i=0; i<iter; i++) {

      if (sender) {
	/* if verifying, fill buffer with a value and csum */
	if (Verify && cur_len >= 4) {
	  unsigned char *p = buff;
	  int j;
	  unsigned char sum = 0;
	  *(unsigned short *)p = htons((i+1) & 0xFFFF);
	  sum += *p++;
	  sum += *p++;
	  for (j=0; j<cur_len-3; ++j) {
	    sum += (*p++ = rand());
	  }
	  *p = sum;
	}

	/* post send */
	if (sync)
	  mx_issend(ep, segs, nsegs, dest, sseq, NULL, &sreq);
	else
	  mx_isend(ep, segs, nsegs, dest, sseq, NULL, &sreq);
	sseq++;
	
	/* wait for the send to complete */
	if (wait)
	  mx_wait(ep, &sreq, MX_INFINITE, &stat, &result);
	else {
	  do { 
	    mx_test(ep, &sreq, &stat, &result);
	  } while (!result);  
	}

	/* post receive */
	mx_irecv(ep, segs, nsegs, rseq, MX_MATCH_MASK_NONE, 0, &rreq);
	rseq++;
	
	/* wait for the receive to complete */
	if (wait)
	  mx_wait(ep, &rreq, MX_INFINITE, &stat, &result);
	else {
	  do { 
	    mx_test(ep, &rreq, &stat, &result);
	  } while (!result);  
	}


      } else {

	/* post receive */
	mx_irecv(ep, segs, nsegs, sseq, MX_MATCH_MASK_NONE, 0, &rreq);
	sseq++;

	/* wait for receive to complete */
	if (wait)
	  mx_wait(ep, &rreq, MX_INFINITE, &stat, &result);
	else {
	  do { 
	    mx_test(ep, &rreq, &stat, &result);
	  } while (!result);  
	}


	/* verify contents if needed */
	if (Verify) {
	  if (stat.xfer_length != cur_len) {
	    fprintf(stderr, "Bad len from recv, %d should be %d\n",
	    	stat.xfer_length, cur_len);
	  }
	  if (cur_len >= 4) {
	    unsigned char *p = buff;
	    int j;
	    unsigned char sum = 0;

	    if (htons(*(unsigned short *)p) != ((i+1) & 0xFFFF)) {
	      fprintf(stderr, "len=%d bad index at start of pkt, %d s.b. %d\n",
		      cur_len, (unsigned int) htons(*(unsigned short *)p), 
		      ((i+1) & 0xFFFF));
	    } else {
	      for (j=0; j<cur_len-1; ++j) {
		sum += *p++;
	      }
	      if (*p != sum) {
	        fprintf(stderr, "packet checksum byte wrong, %x should be %x, current length = %d\n",
			(unsigned int)*p, sum, cur_len);
	      }
	    }
	  }
	}
	
	/* post send */
	if (sync)
	  mx_issend(ep, segs, nsegs, stat.source, rseq, NULL, &sreq);
	else
	  mx_isend(ep, segs, nsegs, stat.source, rseq, NULL, &sreq);
	rseq++;

	/* wait for send to complete */
	if (wait)
	  mx_wait(ep, &sreq, MX_INFINITE, &stat, &result);
	else {
	  do { 
	    mx_test(ep, &sreq, &stat, &result);
	  } while (!result);  
	}

      }
    }

    /* get ending time */
    gettimeofday(&end_time, NULL);
    
    usec = end_time.tv_usec - start_time.tv_usec;
    usec += (end_time.tv_sec - start_time.tv_sec) * 1000000;
    lat = (double) usec / iter / 2;
    bw =  (2.0 * iter * cur_len) / (double) usec;

    if (sender) {
      printf("%9d   %9.3f       %8.3f\n", cur_len, lat, bw);
    }

    /* update current length */
    if (inc > 0) {
      cur_len += inc;
    } else {
      int new_len;

      new_len = cur_len * mult;
      if (new_len > cur_len) {
	cur_len = new_len;
      } else {
	++cur_len;
      }
    }
  }

  free(buff);
  free(segs);

  return MX_SUCCESS;
}

int main(int argc, char **argv)
{
  mx_endpoint_t ep;
  uint64_t nic_id;
  uint32_t my_eid;
  uint64_t his_nic_id;
  uint32_t board_id;
  uint32_t filter;
  uint16_t his_eid;
  mx_endpoint_addr_t his_addr;
  char *rem_host;
  int inc;
  double mult;
  int start_len;
  int end_len;
  int iter;
  int c;
  int do_wait;
  int do_sync;
  uint32_t nsegs;
  extern char *optarg;

#if DEBUG
  extern int mx_debug_mask;
  mx_debug_mask = 0xFFF;
#endif
  mx_init();

  /* set up defaults */
  rem_host = NULL;
  filter = FILTER;
  inc = 0;
  my_eid = DFLT_EID;
  his_eid = DFLT_EID;
  board_id = MX_ANY_NIC;
  inc = DFLT_INC;
  end_len = DFLT_END;
  start_len = DFLT_START;
  iter = DFLT_ITER;
  mult = DFLT_MULT;
  do_wait = 0;
  do_sync = 0;
  nsegs = 1;

  while ((c = getopt(argc, argv, "hd:e:n:b:r:f:S:E:I:M:N:Vwsm:")) != EOF) switch(c) {
  case 'd':
    rem_host = optarg;
    break;
  case 'e':
    my_eid = atoi(optarg);
    break;
  case 'f':
    filter = atoi(optarg);
    break;
  case 'n':
    sscanf(optarg, "%"SCNx64, &nic_id);
    mx_nic_id_to_board_number(nic_id, &board_id);
    break;
  case 'b':
    board_id = atoi(optarg);
    break;
  case 'r':
    his_eid = atoi(optarg);
    break;
  case 'S':
    start_len = atoi(optarg);
    break;
  case 'E':
    end_len = atoi(optarg);
    if (end_len > MAX_LEN) {
      fprintf(stderr, "end_len too large, max is %d\n", MAX_LEN);
      exit(1);
    }
    break;
  case 'M':
    mult = atof(optarg);
    inc = 0;
    break;
  case 'I':
    inc = atoi(optarg);
    break;
  case 'N':
    iter = atoi(optarg);
    break;
  case 'V':
    Verify = 1;
    break;
  case 'w':
    do_wait = 1;
    break;
  case 's':
    do_sync = 1;
    break;
  case 'm':
    nsegs = atoi(optarg);
    break;
  case 'h':
  default:
    usage();
    exit(1);
  }

  mx_open_endpoint(board_id, my_eid, filter, NULL, 0, &ep);
  
  /* If no host, we are receiver */
  if (rem_host == NULL) {
    char hostname[MX_MAX_HOSTNAME_LEN];
    mx_endpoint_addr_t me;
    mx_get_endpoint_addr(ep, &me);
    mx_decompose_endpoint_addr(me, &nic_id, &my_eid);
    mx_nic_id_to_hostname(nic_id, hostname);
    printf("Starting pingpong receiver on %s, endpoint=%d\n", hostname, my_eid);
    do {
      mx_request_t req;
      mx_status_t status;
      uint32_t eid;
      mx_segment_t seg;
      uint32_t result;
      uint64_t nic;
      uint32_t junk;

      seg.segment_ptr = &eid;
      seg.segment_length = sizeof (eid);
      mx_irecv(ep, &seg, 1, 
		    UINT64_C(0x1111000011110000),MX_MATCH_MASK_NONE,
		    0, &req);
      mx_wait(ep, &req, MX_INFINITE, &status, &result);
      assert(result == 1);


      if (Verify) printf("Verifying results\n");

      mx_decompose_endpoint_addr(status.source, &nic, &junk);
      mx_connect (ep, nic, ntohl(eid), filter, MX_INFINITE, &his_addr);

      pingpong(0, ep, his_addr, iter, start_len, end_len,
	       inc, mult, do_wait, do_sync, nsegs);
    } while (0);

  /* remote hostname implies we are sender */
  } else {
    mx_request_t req;
    mx_status_t status;
    mx_segment_t seg;
    uint32_t result;
    mx_endpoint_addr_t me;
    uint32_t eid;
    uint64_t nic;

    mx_get_endpoint_addr(ep, &me);
    mx_decompose_endpoint_addr(me, &nic, &eid);
    eid = htonl(eid);

    /* get address of destination */
    mx_hostname_to_nic_id(rem_host, &his_nic_id);
    mx_connect(ep, his_nic_id, his_eid, filter, MX_INFINITE, &his_addr);
    seg.segment_ptr = &eid;
    seg.segment_length = sizeof (eid);
    mx_isend(ep, &seg, 1, his_addr, UINT64_C(0x1111000011110000), 0, &req);
    mx_wait(ep, &req,MX_INFINITE, &status, &result);

    printf("Starting pingpong send to host %s\n", rem_host);
    if (Verify) printf("Verifying results\n");

    /* start up the sender */
    pingpong(1, ep, his_addr, iter, start_len, end_len,
	     inc, mult, do_wait, do_sync, nsegs);
  }

  
  mx_close_endpoint(ep);
  mx_finalize();
  return 0;
}
